import HTMLParser
import urllib
import re


urlString = "http://www.nagoya.jpix.ad.jp/user/index.html"
urlText = []
pathOut = "ixp_members.txt"


class Parse165 (HTMLParser.HTMLParser):
    
    check = False
    
    def handle_starttag (self, tag, attrs):
        if tag == "td":
            self.check = True
    
    def handle_data (self, data):
        if self.check == True:
            m = re.match("^\s*AS(\d+)\s*$",data)
            if m:
                data = m.group(1)
                if data not in urlText:
                    urlText.append(data)
            self.check = False


lparser = Parse165()
page = urllib.urlopen(urlString).read()
# correggo il codice html in cui manca una parentesi angolare chiusa (linea 171)
# altrimenti il parser non funziona
page = re.sub("</a([^>])", "</a>\1", page)
lparser.feed(page)
fileOut = open(pathOut, "a")
for item in urlText:
    #print item # debug
    print >> fileOut, "165 %s" % item
fileOut.close()